{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "This file is for replicating Table 5.\n",
    "You need invent_location, NBER_class, states_order, all control files.\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "starting_year = [1976, 1986, 1996, 2006]\n",
    "end_year = [1985, 1995, 2005, 2015]\n",
    "\n",
    "LOC = pd.read_csv('invent_location.csv', sep='*')\n",
    "NBER_class = pd.read_csv('NBER_class.csv', sep='\\t')\n",
    "states_order = pd.read_csv('states_order.csv')\n",
    "name_order = states_order.sort_values(by=['order'])['name'].reset_index(drop=True)\n",
    "states_order = states_order.sort_values(by=['order'])['state'].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"Set control criteria name and a rounding decision\"\"\"\n",
    "r = 2 # rounding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:37: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:57: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:71: RuntimeWarning: invalid value encountered in double_scalars\n"
     ]
    }
   ],
   "source": [
    "namespace = globals()\n",
    "\n",
    "Citing_1976 = []; Citing_1986 = []; Citing_1996 = []; Citing_2006 = []\n",
    "Control_1976 = []; Control_1986 = []; Control_1996 = []; Control_2006 = []\n",
    "t_1976 = []; t_1986 = []; t_1996 = []; t_2006 = []\n",
    "\n",
    "for x, y in zip(starting_year, end_year):\n",
    "    \n",
    "    p_hat_citing_list = []\n",
    "    SE_hat_citing_list = []\n",
    "    \n",
    "    for z in [1,4]:\n",
    "                \n",
    "        file_in = 'controls' + str(z) + '_' + str(x) + '_' + str(y) +'.csv'\n",
    "        www = pd.read_csv(file_in, sep=','); www = www[['cited','citing','citing_control','period']]\n",
    "        www = pd.merge(www,LOC,left_on='cited',right_on='wku',how='inner'); www = www.drop('wku', axis=1)\n",
    "        www.columns = ['cited','citing','control','period','cnt_cited','sta_cited','cmsa_cited']\n",
    "        www = www[www.cnt_cited == \"US\"] # cited가 미국 특허인 것만 남김\n",
    "\n",
    "        www = www[www.period > 0]\n",
    "        www = pd.merge(www,LOC,left_on='citing',right_on='wku',how='inner'); www = www.drop('wku', axis=1)\n",
    "        www.columns = ['cited','citing','control','period','cnt_cited', 'sta_cited','cmsa_cited','cnt_citing','sta_citing','cmsa_citing']\n",
    "        www = pd.merge(www,LOC,left_on='control',right_on='wku',how='inner'); www = www.drop('wku', axis=1)\n",
    "        www.columns = ['cited','citing','control','period','cnt_cited','sta_cited','cmsa_cited','cnt_citing','sta_citing','cmsa_citing','cnt_control','sta_control','cmsa_control']\n",
    "        www = pd.merge(www,NBER_class[['wku','subcat']],left_on='cited',right_on='wku',how='inner'); www = www.drop('wku', axis=1)\n",
    "        www = www.rename(columns = {'subcat':'subcat_cited'})\n",
    "        www['x'] = 1\n",
    "\n",
    "        for [i, state] in enumerate(states_order):\n",
    "            \n",
    "            www_temp = www[www['sta_cited']==state]\n",
    "            \n",
    "            for location in ['sta']:\n",
    "                \n",
    "                if z==1:\n",
    "\n",
    "                    www_temp['cited_equals_citing_'+location] = (www_temp[location+'_cited']==www_temp[location+'_citing'])\n",
    "\n",
    "                    citing_calc = www_temp[['x','cited_equals_citing_'+location,'sta_cited','subcat_cited']]\n",
    "                    #if FOREIGN==1: citing_calc = citing_calc.replace(np.nan, 'FOREIGN')\n",
    "                    citing_calc = citing_calc.groupby(['sta_cited','subcat_cited']).sum()\n",
    "                    citing_calc['p_ij'] = citing_calc['cited_equals_citing_'+location]/citing_calc['x']\n",
    "                    citing_calc['w_ij'] = citing_calc['x']/citing_calc['x'].sum()\n",
    "                    \n",
    "                    citing_calc['w_p'] = citing_calc['p_ij']*citing_calc['w_ij']\n",
    "                    if len(citing_calc) ==0: p_hat_citing = np.nan\n",
    "                    else: p_hat_citing = citing_calc.sum()['w_p']\n",
    "                    p_hat_citing_list.append(p_hat_citing)\n",
    "                    citing_calc['ww_pp'] = (citing_calc['p_ij']-p_hat_citing)**2*citing_calc['w_ij']**2\n",
    "                    SE_hat_citing = np.sqrt(citing_calc.sum()['ww_pp'])\n",
    "                    SE_hat_citing_list.append(SE_hat_citing)\n",
    "                    \n",
    "                    namespace['Citing_%d' % x].append(round(p_hat_citing*100,r))\n",
    "                    \n",
    "                else: \n",
    "\n",
    "                    www_temp['cited_equals_control_'+location] = (www_temp[location+'_cited']==www_temp[location+'_control'])\n",
    "\n",
    "                    control_calc = www_temp[['x','cited_equals_control_'+location,'sta_cited','subcat_cited']]                    \n",
    "                    #if FOREIGN==1: control_calc = control_calc.replace(np.nan, 'FOREIGN')\n",
    "                    control_calc = control_calc.groupby(['sta_cited','subcat_cited']).sum()\n",
    "                    control_calc['p_ij'] = control_calc['cited_equals_control_'+location]/control_calc['x']\n",
    "                    control_calc['w_ij'] = control_calc['x']/control_calc['x'].sum()\n",
    "                    control_calc['w_p'] = control_calc['p_ij']*control_calc['w_ij']\n",
    "                    if len(control_calc) ==0: p_hat_control = np.nan\n",
    "                    else: p_hat_control = control_calc.sum()['w_p']\n",
    "                    control_calc['ww_pp'] = (control_calc['p_ij']-p_hat_control)**2*control_calc['w_ij']**2\n",
    "                    SE_hat_control = np.sqrt(control_calc.sum()['ww_pp'])\n",
    "\n",
    "                    ########t-value SE calculation method                  \n",
    "                    t_value = (p_hat_citing_list[i]-p_hat_control)/np.sqrt(SE_hat_citing_list[i]**2+SE_hat_control**2)\n",
    "\n",
    "                    namespace['Control_%d' % x].append(round(p_hat_control*100,r))\n",
    "                    namespace['t_%d' % x].append(round(t_value,r))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.DataFrame()\n",
    "\n",
    "for x, y in zip(starting_year, end_year):\n",
    "    Result = pd.DataFrame(columns=[str(x), str(y), str(x)+'_t'])\n",
    "    \n",
    "    for i in range(len(name_order)):\n",
    "        if np.isnan(namespace['Citing_%d' % x][i]) == 1: namespace['Citing_%d' % x][i] = '-'\n",
    "        if np.isnan(namespace['Control_%d' % x][i]) == 1: namespace['Control_%d' % x][i] = '-'\n",
    "        if np.isnan(namespace['t_%d' % x][i]) == 1: namespace['t_%d' % x][i] = '-'\n",
    "        Result.loc[name_order[i]] = [namespace['Citing_%d' % x][i], namespace['Control_%d' % x][i], '(' + str(namespace['t_%d' % x][i]) + ')']\n",
    "   \n",
    "    result = pd.concat([result, Result], axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "print(result.to_latex())"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
